In [4]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
from pywaffle import Waffle
import matplotlib.pyplot as plt
init_notebook_mode(connected=False)

#importing modeling libraries
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
In [3]:
pip install xgboost
Collecting xgboost
  Using cached xgboost-1.6.0-py3-none-win_amd64.whl (126.1 MB)
Requirement already satisfied: numpy in c:\users\kumar\anaconda3\lib\site-packages (from xgboost) (1.20.1)
Requirement already satisfied: scipy in c:\users\kumar\anaconda3\lib\site-packages (from xgboost) (1.6.2)
Installing collected packages: xgboost
Successfully installed xgboost-1.6.0
Note: you may need to restart the kernel to use updated packages.
In [5]:
df = pd.read_csv("C:/Users/kumar/OneDrive/Documents/IV_J_COMPONENT/Space_Corrected.csv")
df.drop(['Unnamed: 0.1','Unnamed: 0'], axis = 1, inplace = True)
df.head()
Out[5]:
Company Name Location Datum Detail Status Rocket Rocket Status Mission
0 SpaceX LC-39A, Kennedy Space Center, Florida, USA Fri Aug 07, 2020 05:12 UTC Falcon 9 Block 5 | Starlink V1 L9 & BlackSky StatusActive 50.0 Success
1 CASC Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... Thu Aug 06, 2020 04:01 UTC Long March 2D | Gaofen-9 04 & Q-SAT StatusActive 29.75 Success
2 SpaceX Pad A, Boca Chica, Texas, USA Tue Aug 04, 2020 23:57 UTC Starship Prototype | 150 Meter Hop StatusActive NaN Success
3 Roscosmos Site 200/39, Baikonur Cosmodrome, Kazakhstan Thu Jul 30, 2020 21:25 UTC Proton-M/Briz-M | Ekspress-80 & Ekspress-103 StatusActive 65.0 Success
4 ULA SLC-41, Cape Canaveral AFS, Florida, USA Thu Jul 30, 2020 11:50 UTC Atlas V 541 | Perseverance StatusActive 145.0 Success
In [6]:
#function to extract the name of the country from the location
def extract_country_name(location):
    country = location.split(',')[-1]
    country = country.strip()
    return country

#dictionary to help in mapping to get consistent and correct Country Names
countries_dict = {
    'Russia' : 'Russian Federation',
    'New Mexico' : 'USA',
    "Yellow Sea": 'China',
    "Shahrud Missile Test Site": "Iran",
    "Pacific Missile Range Facility": 'USA',
    "Barents Sea": 'Russian Federation',
    "Gran Canaria": 'USA'
}
In [9]:
df['Country'] = df['Location'].apply(lambda x: extract_country_name(x))
df['Country'] = df['Country'].replace(countries_dict)
In [10]:
#extracting date-time features
df['Datum'] = pd.to_datetime(df['Datum'])
df['year'] = df['Datum'].apply(lambda datetime: datetime.year)
df['month'] = df['Datum'].apply(lambda datetime: datetime.month)
df['weekday'] = df['Datum'].apply(lambda datetime: datetime.weekday()
  File "<ipython-input-10-cdb638a3eed6>", line 5
    df['weekday'] = df['Datum'].apply(lambda datetime: datetime.weekday()
                                                                         ^
SyntaxError: unexpected EOF while parsing
In [11]:
def getVehicles(detail):
    lv = []
    li = [x.strip() for x in detail.split('|')] #extracting the name of all launch vehicles from the Details section
    for ele in li:
        if('Cosmos' in ele):
            lv.append('Cosmos')
        elif('Vostok' in ele):
            lv.append('Vostok')
        elif('Tsyklon' in ele):
            lv.append('Tsyklon')
        elif('Ariane' in ele):
            lv.append('Ariane')
        elif('Atlas' in ele):
            lv.append('Atlas')
        elif('Soyuz' in ele):
            lv.append('Soyuz')
        elif('Delta' in ele):
            lv.append('Delta')
        elif('Titan' in ele):
            lv.append('Titan')
        elif('Molniya' in ele):
            lv.append('Molniya')
        elif('Zenit' in ele):
            lv.append('Zenit')
        elif('Falcon' in ele):
            lv.append('Falcon')
        elif('Long March' in ele):
            lv.append('Long March')
        elif('PSLV' in ele):
            lv.append('PSLV')
        elif('GSLV' in ele):
            lv.append('GSLV')
        elif('Thor' in ele):
            lv.append('Thor')
        else:
            lv.append('Other')
    return lv
df['Launch Vehicles'] = df['Detail'].apply(lambda x:getVehicles(x))
In [12]:
#creating a waffle Chart using pywaffle
plt.rcParams['figure.figsize'] = (7,12)
data = dict(df['Status Mission'].value_counts(normalize = True) * 100)
fig = plt.figure(
    FigureClass=Waffle, 
    columns=10, 
    values=data, 
    colors=("#3bff3b", "#ff3b3b", "#ffff3b","#ff9d3b"),
    title={'label': 'Status of Space Missions', 'loc': 'center'},
    icons = 'rocket',
    icon_size = 20,
    labels=[f"{k} ({v:.2f}%)" for k, v in data.items()],
    legend={'loc': 'lower left', 'bbox_to_anchor': (0, -0.4), 'ncol': len(data), 'framealpha': 0.3}
)
plt.show()
In [13]:
country_counts = dict(df['Country'].value_counts())
fig = go.Figure(data=[go.Table(
    header=dict(values=['<b>Country Name</b>', '<b>Number of Space Missions</b>'],
                line_color='black',
                fill_color='darkorange',
                align='left',
                font=dict(color='black', size=14)),
    cells=dict(values=[list(country_counts.keys()),
                      list(country_counts.values())],
               line_color='black',
               fill_color='white',
               align='left',
               font=dict(color='black', size=13)))
])

fig.update_layout(width=500, height=450,margin=dict(l=80, r=80, t=25, b=10),
                  title = { 'text' : '<b>Number of Space Missions Per Launch Location</b>', 'x' : 0.95},
                 font_family = 'Fira Code',title_font_color= '#ff0d00')
fig.show()
In [14]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(df['Status Mission'])
colors = {0 : 'red', 1 : 'Orange', 2 : 'Yellow', 3 : 'Green'}
In [15]:
fig = make_subplots(rows = 4 ,cols = 4,subplot_titles=df['Country'].unique())
for i, country in enumerate(df['Country'].unique()):
    counts = df[df['Country'] == country]['Status Mission'].value_counts(normalize = True) * 100
    color = [colors[x] for x in encoder.transform(counts.index)]
    trace = go.Bar(x = counts.index, y = counts.values, name = country,showlegend=False,marker={'color' : color})
    fig.add_trace(trace, row = (i//4)+1, col = (i%4)+1)
fig.update_layout(template = 'plotly_dark',margin=dict(l=80, r=80, t=50, b=10),
                  title = { 'text' : '<b>Countries and Mission Status</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#cacaca',height = 1000,width = 1100)
for i in range(1,5):
    fig.update_yaxes(title_text = 'Percentage',row = i, col = 1)
fig.show()
In [16]:
successPerc = df[df['Status Mission'] == 'Success'].groupby('Company Name')['Status Mission'].count()
for company in successPerc.index:
    successPerc[company] = (successPerc[company] / len(df[df['Company Name'] == company]))*100
successPerc = successPerc.sort_index()
FailurePerc = df[df['Status Mission'] == 'Failure'].groupby('Company Name')['Status Mission'].count()
for company in FailurePerc.index:
    FailurePerc[company] = (FailurePerc[company] / len(df[df['Company Name'] == company]))*100
FailurePerc = FailurePerc.sort_index()
In [17]:
trace1 = go.Bar(x = successPerc.index, y = successPerc.values, name = 'Success Rate of Companies',opacity=0.7)
trace2 = go.Bar(x = FailurePerc.index, y = FailurePerc.values, name = 'Failure Rate of Companies',opacity=0.7)
fig = go.Figure([trace1,trace2])
fig.update_layout(template = 'plotly_white',margin=dict(l=80, r=80, t=25, b=10),
                  title = {'text' : '<b>Success and Failure Rates of Companies</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#8000ff',width = 1000,yaxis_title = '<b>Percentage</b>',xaxis_title = '<b>Companies</b>',
                 legend=dict(
                    yanchor="top",
                    y=0.99,
                    xanchor="left",
                    x=0.01
))

fig.show()
In [19]:
fig = px.treemap(df,path = ['Status Mission','Country','Company Name'])
fig.update_layout(template = 'ggplot2',margin=dict(l=80, r=80, t=50, b=10),
                  title = { 'text' : '<b>Mission Status,Countries and Companies</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#ff6767')
fig.show()
In [21]:
# creating a single list containing the names of the Launch Vehicles
details = []
for detail in df.Detail.values:
    d = [x.strip() for x in detail.split('|')]
    for ele in d:
        if('Cosmos' in ele):
            details.append('Cosmos')
        elif('Vostok' in ele):
            details.append('Vostok')
        elif('Tsyklon' in ele):
            details.append('Tsyklon')
        elif('Ariane' in ele):
            details.append('Ariane')
        elif('Atlas' in ele):
            details.append('Atlas')
        elif('Soyuz' in ele):
            details.append('Soyuz')
        elif('Delta' in ele):
            details.append('Delta')
        elif('Titan' in ele):
            details.append('Titan')
        elif('Molniya' in ele):
            details.append('Molniya')
        elif('Zenit' in ele):
            details.append('Zenit')
        elif('Falcon' in ele):
            details.append('Falcon')
        elif('Long March' in ele):
            details.append('Long March')
        elif('PSLV' in ele):
            details.append('PSLV')
        elif('GSLV' in ele):
            details.append('GSLV')
        elif('Thor' in ele):
            details.append('Thor')
        else:
            details.append('Other')
In [22]:
counts = dict(pd.Series(details).value_counts(sort = True))
fig = go.Figure(go.Bar(x = list(counts.keys()), y = list(counts.values())))
fig.update_layout(template = 'ggplot2',margin=dict(l=80, r=80, t=50, b=10),
                  title = { 'text' : '<b>Number of Missions in each type of Launch Vehicle</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#ff3434',
                 yaxis_title = '<b>Number of Missions</b>',xaxis_title = '<b>Launch Vehicle</b>',)
fig.show()
In [25]:
fig = make_subplots(rows = 3, cols = 1)
for i, period in enumerate(['year', 'month', 'weekday']):
    data = df[df['Status Mission'] == 'Failure'][period].value_counts().sort_index()
    data = dict((data / df[period].value_counts().sort_index())*100.0)
    mean = sum(data.values()) / len(data)
    if(period == 'year'):
        x = list(data.keys())
    elif(period == 'month'):
        x = ['January', 'February', 'March', 'April', 'May','June', 'July', 'August','September','October', 'November', 'December']
    else:
        x = ['Monday', 'Tuesday', 'Wednesday','Thursday','Friday','Saturday','Sunday']
    trace1 = go.Scatter(x = x, y = list(data.values()),mode = 'lines',text = list(data.keys()),name = f'Failures in each {period}',connectgaps = False)
    trace2 = go.Scatter(x = x, y = [mean]*len(data), mode = 'lines',showlegend=False,name = f'Mean failures over the {period}s',line = {'dash':'dash','color':
                                                                                                                                       'grey'})
    fig.append_trace(trace1, row = i+1, col = 1)
    fig.append_trace(trace2, row = i+1, col = 1)
fig.update_layout(template = 'simple_white',height = 600,
                  title = { 'text' : '<b>Failed Missions as a percentage of total missions in that period</b>', 'x' : 0.5})
for i in range(1,4):
    fig.update_yaxes(title_text = '<b>Percentage</b>',row = i, col = 1)
fig.show()
In [24]:
#extracting date-time features
df['Datum'] = pd.to_datetime(df['Datum'])
df['year'] = df['Datum'].apply(lambda datetime: datetime.year)
df['month'] = df['Datum'].apply(lambda datetime: datetime.month)
df['weekday'] = df['Datum'].apply(lambda datetime: datetime.weekday())
In [26]:
df[' Rocket'] = df[' Rocket'].apply(lambda x: str(x).replace(',',''))
df[' Rocket'] = df[' Rocket'].astype('float64')
df[' Rocket'] = df[' Rocket'].fillna(0)
In [27]:
costDict = dict(df[df[' Rocket'] > 0].groupby('year')[' Rocket'].mean())
fig = go.Figure(go.Scatter(x = list(costDict.keys()), y = list(costDict.values()), yaxis = 'y2',mode = 'lines',showlegend=False,name = 'Average Mission Cost Over the years'))
fig.update_layout(template = 'plotly_dark',margin=dict(l=80, r=80, t=50, b=10),
                  title = { 'text' : '<b>Average Mission Cost Over the years</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#cacaca',
                 yaxis_title = '<b>Cost of Mission in Million Dollars</b>',xaxis_title = '<b>Year of Launch</b>',)
fig.show()
In [28]:
fig = px.scatter(df[df[' Rocket'].between(1,4999)],x = 'year', y = 'Country', color = 'Status Mission',size = ' Rocket', size_max=30)
fig.update_layout(template = 'simple_white',margin=dict(l=80, r=80, t=50, b=10),
                  title = { 'text' : '<b>Average Mission Cost Over the years For Various Countries</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#00b300')
fig.show()
In [29]:
fig = px.scatter(df[df[' Rocket'].between(1,4999)],x = 'year', y = 'Company Name',color = 'Status Mission',size = ' Rocket',size_max = 30)
fig.update_layout(template = 'simple_white',margin=dict(l=80, r=80, t=50, b=10),
                  title = { 'text' : '<b>Average Mission Cost Over the years For Various Companies</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#00b300',height = 650)
fig.show()
In [30]:
fig = px.scatter(df[df[' Rocket'].between(1,4999)],x = 'year', y = 'Company Name',color = 'Status Mission',size = ' Rocket',size_max = 30)
fig.update_layout(template = 'simple_white',margin=dict(l=80, r=80, t=50, b=10),
                  title = { 'text' : '<b>Average Mission Cost Over the years For Various Companies</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#00b300',height = 650)
fig.show()
In [ ]: